### Reproducing appendix figures

#loading required packages ####
library(ggplot2)
library(dplyr)
library(grid)
library(gridExtra)
library(coefplot)
library(Hmisc)
library(ggrepel)
library(lme4)
library(kableExtra)
library(matrixStats)
library(mice)
library(janitor)
library(psych)
library(GPArotation)
library(here)
library(stringr)
library(lubridate)
library(tidyr)
summarize <- dplyr::summarize
setwd(here())

## Most Distinctive Names ####
dat <- read.csv("nameratings.csv")

dat %>%
  group_by(race, gender) %>%
  slice_max(distinct, n=10)

### Factor Analysis ####

dat <- read.csv("comb_traitratings.csv")
#remove observations rated only on race
dat <- dat[!is.na(dat$a.Democrat)|!is.na(dat$a.man),]

#impute missing characteristics
impobj <- mice(dat)
dat <- complete(impobj, "long") %>%
  group_by(.id) %>%
  dplyr::summarize_all(.funs=median)

#fit factor analysis model
fit <- psych::fa(select(dat, traditional,compassionate, competent, honest, intelligent, likable, professional, warm),
                 nfactors=1, fm="pa")
fit
dat$fa_score <- fit$scores[,1]

### Intragroup variation ####
datb <- read.csv("nameratings.csv")

## first, assign gender and race to distinctive names
dat <- merge(dat, select(datb, name, race, gender))


names <- dat %>%
  group_by(race, gender) %>%
  slice_min(fa_score, n=5) %>%
  select(name) %>%
  bind_rows(dat %>%
              group_by(race, gender) %>%
              slice_max(fa_score, n=5) %>%
              arrange(fa_score) %>%
              select(name))
names <- unlist(names[[3]])

a <- dat %>%
  filter(name %in% names) %>%
  mutate(name = factor(name, levels=names)) %>%
  ggplot(aes(x=fa_score, y=name)) + theme_bw() +
  geom_point() +
  xlim(c(-3,3)) + 
  facet_wrap(~gender+capitalize(race), nrow = 2,
             scales="free") +
  xlab("Valence Score Rating") + ylab("Name")

pdf("SI Figures/with_group.pdf", width=10, height=7)
a
dev.off()

## Intergroup variation figure ####
#read in long and wide data
dat <- read.csv("comb_traitratings_withSEs.csv")
datb <- read.csv("nameratings.csv")

dat <- merge(dat, select(datb, name, race, gender))

pdf("SI Figures/updated_bw_group.pdf")

dat %>% 
  group_by(att, gender, race) %>%
  dplyr::summarize(mn=mean(Estimate, na.rm=T)) %>%
  drop_na() %>%
  filter(race!="asian") %>%
ggplot() + 
  geom_point(aes(x=mn, y=att,
                 color=race, shape=gender)) +
  theme_bw() + xlab("Group Average Estimate") +
  ylab("Attribute") + ggtitle("Average Trait Ratings by Group") +
  scale_color_discrete(name="Perceived Race",
                       labels=c("Black","Hispanic", "White")) +
  scale_shape_discrete(name="Perceived Gender",
                       labels=c("Woman", "Man"))
  
dev.off()



### Group-level trait ratings ####
datb <- read.csv("nameratings.csv")
#create race-gender groups
datb$group <- paste(datb$race, datb$gender)

#grab raw ratings 
tab1 <- read.csv("comb_longratings.csv")
tab1 <- tab1 %>% separate(name, c("first", "last"), " ")
tab1$last <- NULL

#summarize by name and trait
a <- tab1 %>% group_by(first, trait) %>% dplyr::summarize(n())

tab1 <- merge(tab1, a, by=c("first", "trait"))
tab1 <- tab1[tab1$`n()`>15&tab1$first!="",]

#merge in groups
tab1 <- merge(tab1, select(datb, group, name), by.x=c("first"), by.y=c("name"))

#create object to store results
traits <- unique(tab1$trait)
res <- t(as.data.frame(c(NA, NA, NA, NA)))
colnames(res) <- c("Estimate", "Std. Error","att", "name")

#estimate name coefs for each trait
for(i in 1:length(traits)){
  mod1 <- lmer(rate~group - 1 + (1|first), data=tab1[tab1$trait==traits[i],], na.action="na.omit")
  mod1s <- summary(mod1)
  mod1s <- as.data.frame(mod1s$coefficients[,1:2])
  mod1s$att <- traits[i]
  mod1s$name <- rownames(mod1s)
  res <- rbind(res, mod1s)
  print(i)
}

#format for export
res <- res[-1,]
res <- res %>% separate(name, into = c("race", "gen"), remove=T, sep=" ")
res$race <- str_remove(res$race, "group")
res$`Std. Error` <- paste("(", round(res$`Std. Error`,2), ")", sep="")
res$Estimate <- as.character(round(res$Estimate, 2))
tab2 <- res %>% 
  pivot_longer(cols=c(Estimate, `Std. Error`)) %>%
  pivot_wider(id_cols=c(att, name), names_from=c(race, gen), values_from=c(value)) %>%
  filter(att %in% c("warm", "professional", "likable",
                    "honest", "competent", "assertive",
                    "working class", "aggressive", "intelligent",
                    "hardworking", "violent"))

#print TeX code of results
tab2 %>% 
  select(-name) %>% 
kable(digits = 2, format = "latex", booktabs=T,
      col.names = c("Trait","W","M", "W", "M", "W", "M", "W", "M")) %>%
  add_header_above(c(" "=1,"Asian"=2, "African American"=2, "Hispanic"=2, "White"=2))


## Gender Accuracy ####

dat <- read.csv("nameratings.csv")

a <- ggplot(dat, aes(x=a.woman, y=pct_f)) +
  geom_point() +
  geom_smooth(se=FALSE) + 
  theme_bw() +
  theme(text=element_text(size=20)) + 
  xlab("Rating: likelihood of being a woman") + 
  ylab("Pct. Female in NC Voter File")


pdf("SI Figures/gender_real.pdf")
a
dev.off()

dat$man_dist <- dat$a.man - dat$a.woman
dat$fem_dist <- dat$a.woman - dat$a.man



#load data
dat <- read.csv(here("comb_traitratings.csv"))
db <- read.csv(here("nc_voters_race.csv"))
dat <- left_join(dat, db)
feat <- c("Emily", "Laurie", "Misty", "Octavia", "Keisha", "Latoya")
dat$select <- ifelse(dat$name %in% feat, 1, 0)

#produce plots for each racial group
a <- ggplot(data=dat) + geom_point(aes(x=pctwhite, y=white)) + geom_smooth(aes(x=pctwhite, y=white), se=FALSE) +
  geom_point(data=dat[dat$select==1,], aes(x=pctwhite, y=white), colour="black", fill="gray", shape=21, size=3) +
  xlab("Percent White in Voter File") + ylab("Rated Likelihood White") + theme_bw() + ylim(c(1.5,4.5))
b <- ggplot(data=dat) + geom_point(aes(x=pctblack, y=African.American)) + geom_smooth(aes(x=pctblack, y=African.American), se=FALSE) +
  geom_point(data=dat[dat$select==1,], aes(x=pctblack, y=African.American), colour="black", fill="gray", shape=21, size=3) +
  xlab("Percent Black in Voter File") + ylab("Rated Likelihood Black") + theme_bw() + ylim(c(1.5,4.5))
c <- ggplot(data=dat) + geom_point(aes(x=pcthisp, y=Hispanic)) + geom_smooth(aes(x=pcthisp, y=Hispanic), se=FALSE) +
  geom_point(data=dat[dat$select==1,], aes(x=pcthisp, y=Hispanic), colour="black", fill="gray", shape=21, size=3) +
  xlab("Percent Hispanic in Voter File") + ylab("Rated Likelihood Hispanic") + theme_bw() + ylim(c(1.5,4.5))
d <- ggplot(data=dat) + geom_point(aes(x=pctapi, y=Asian)) + geom_smooth(aes(x=pctapi, y=Asian), se=FALSE) +
  geom_point(data=dat[dat$select==1,], aes(x=pctapi, y=Asian), colour="black", fill="gray", shape=21, size=3) +
  xlab("Percent Asian in Voter File") + ylab("Rated Likelihood Asian") + theme_bw() + ylim(c(1.5,4.5))

#combine
pdf("Figures/figure1.pdf")
grid.arrange(a,b,c,d, nrow=2, top=textGrob("Rated Race of Names by Actual Race Distribution", gp=gpar(fontsize=17)))
dev.off()

## NC vs. Tzioumis ####
#load data
dat <- read.csv(here("..", "firstnames.csv"))
dat$name <- capitalize(tolower(dat$�..firstname))
colnames(dat) <- str_remove(colnames(dat), "pct")
db <- read.csv(here("nc_voters_race.csv"))
dat <- left_join(dat, db)
#produce plots for each racial group
a <- ggplot(data=dat) + geom_point(aes(x=pctwhite*100, y=white), size=2) + geom_smooth(aes(x=pctwhite*100, y=white), se=FALSE) +
  xlab("Percent White in Voter File") + ylab("Percent White in Mortgage Data") + theme_bw() 
b <- ggplot(data=dat) + geom_point(aes(x=pctblack*100, y=black), size=2) + geom_smooth(aes(x=pctblack*100, y=black), se=FALSE) +
  xlab("Percent Black in Voter File") + ylab("Percent Black in Mortgage Data") + theme_bw()
c <- ggplot(data=dat) + geom_point(aes(x=pcthisp*100, y=hispanic), size=2) + geom_smooth(aes(x=pcthisp*100, y=hispanic), se=FALSE) +
  xlab("Percent Hispanic in Voter File") + ylab("Percent Hispanic in Mortgage Data") + theme_bw()
d <- ggplot(data=dat) + geom_point(aes(x=pctapi*100, y=api), size=2) + geom_smooth(aes(x=pctapi*100, y=api), se=FALSE) +
  xlab("Percent Asian in Voter File") + ylab("Percent Asian in Mortgage Data") + theme_bw()

#combine
pdf("SI Figures/voterfile_mortgage.pdf")
grid.arrange(a,b,c,d, nrow=2, top=textGrob("Race in Voter File and Mortgage Data", gp=gpar(fontsize=17)))
dev.off()


## repeat for ratings 
dat <- read.csv(here("comb_traitratings.csv"))
db <- read.csv(here("..", "firstnames.csv"))
db$name <- capitalize(tolower(db$�..firstname))
dat <- left_join(dat, db)

#produce plots for each racial group
a <- ggplot(data=dat) + geom_point(aes(x=pctwhite, y=white)) + geom_smooth(aes(x=pctwhite, y=white), se=FALSE) +
  xlab("Percent White in Mortgage Data") + ylab("Rated Likelihood White") + theme_bw() + ylim(c(1.5,4.5))
b <- ggplot(data=dat) + geom_point(aes(x=pctblack, y=African.American)) + geom_smooth(aes(x=pctblack, y=African.American), se=FALSE) +
  xlab("Percent Black in Mortgage Data") + ylab("Rated Likelihood Black") + theme_bw() + ylim(c(1.5,4.5))
c <- ggplot(data=dat) + geom_point(aes(x=pcthispanic, y=Hispanic)) + geom_smooth(aes(x=pcthispanic, y=Hispanic), se=FALSE) +
  xlab("Percent Hispanic in Mortgage Data") + ylab("Rated Likelihood Hispanic") + theme_bw() + ylim(c(1.5,4.5))
d <- ggplot(data=dat) + geom_point(aes(x=pctapi, y=Asian)) + geom_smooth(aes(x=pctapi, y=Asian), se=FALSE) +
  xlab("Percent Asian in Mortgage Data") + ylab("Rated Likelihood Asian") + theme_bw() + ylim(c(1.5,4.5))

#combine
pdf("SI Figures/mortgage_vs_rated.pdf")
grid.arrange(a,b,c,d, nrow=2, top=textGrob("Rated Race of Names by Actual Race Distribution", gp=gpar(fontsize=17)))
dev.off()



## Full Attribute Plot for Experimental Names ####

dat <- read.csv("mturk_nametraits_withSEs.csv")
colnames(dat) <- c("Estimate", "Std..Error", "att", "name")

#select names and attributes for figure
names4fig <- c("Emily", "Laurie", "Latoya", "Keisha", "Octavia", "Misty")
subtable2 <- dat[(dat$name %in% names4fig),]

#reformat for plot 
subtable2$att <- capitalize(subtable2$att)
subtable2$name <- factor(subtable2$name, levels=c("Emily", "Laurie", "Misty", "Latoya", "Keisha", "Octavia"))
subtable2 <- with(subtable2, subtable2[order(-as.numeric(name)),])

#select final subset for plot
white <- c("Emily", "Laurie", "Misty")
black <- c("Octavia", "Latoya", "Keisha")

#create plot
a <- ggplot() + 
  geom_errorbarh(data=subtable2[subtable2$name %in% white,],
                 aes(xmin=(Estimate-1.96*`Std..Error`), 
                     xmax=(Estimate+1.96*`Std..Error`), y=att, fill=name), 
                 colour="gray69", height=.5,
                 position=position_dodgev(height=-.5)) +
  geom_errorbarh(data=subtable2[subtable2$name %in% black,],
                 aes(xmin=(Estimate-1.96*`Std..Error`), 
                     xmax=(Estimate+1.96*`Std..Error`), y=att, fill=name), 
                 colour="gray69", height=.5,
                 position=position_dodgev(height=-.5)) +
  geom_point(data=subtable2[subtable2$name %in% white,],
             aes(x=Estimate, y=att, shape=name), size=3,
             position=position_dodgev(height=-.5)) +
  geom_point(data=subtable2[subtable2$name %in% black,],
             aes(x=Estimate, y=att, shape=name), size=3,
             position=position_dodgev(height=-.5)) +
  theme_bw() + #ggtitle("Attribute Ratings for Black and White Women's Names") +
  scale_shape_manual(values=c("Emily"=0, "Laurie"=1, "Misty"=2,
                              "Latoya"=15, "Keisha"=16, "Octavia"=17),
                     name="Name",
                     breaks=c("Emily", "Laurie", "Misty", "Latoya", "Keisha", "Octavia"),
                     labels=c("Emily"=expression(bold(Emily)), 
                              "Laurie"=expression(bold(Laurie)),
                              "Latoya"=expression(bold(Latoya)),
                              "Keisha"=expression(bold(Keisha)))) +
  ylab("Attribute") + xlab("Estimated Rating") 
 
pdf("SI Figures/full_desante_nametraits.pdf")
a
dev.off()



## Experimental results: excellent workers ####

dat <- read.csv(here("experiment_rawdata.csv"))

#create summary table of all allocations
a <- dat %>% group_by(tc) %>% summarize(mean(oppbucks, na.rm=T))
b <- dat %>% group_by(tc) %>% summarize(mean(embucks, na.rm=T))
c <- dat %>% group_by(tc) %>% summarize(mean(stbucks, na.rm=T))
d <- dat %>% group_by(tc) %>% summarize(sd(embucks, na.rm=T)/sqrt(n()))
f <- dat %>% group_by(tc) %>% summarize(sd(oppbucks, na.rm=T)/sqrt(n()))
g <- dat %>% group_by(tc) %>% summarize(sd(stbucks, na.rm=T)/sqrt(n()))
h <- dat %>% group_by(tc) %>% summarize(length(stbucks))
tab1 <- cbind(b[,2], a[,2], c[,2], d[,2], f[,2], g[,2], h[,2])
colnames(tab1) <- c("Emily $", "Other $", "State $", "SD Em", "SD Opp", "SD St", "N")
rownames(tab1) <- c("Excellent Keisha", "Excellent Laurie", "Excellent Misty",
                    "Excellent Octavia", "Poor Keisha", "Poor Laurie", "Poor Misty", "Poor Octavia")

#restructure to long for plotting purposes
a <- cbind(tab1$`Emily $`, tab1$`SD Em`, tab1$N, rownames(tab1), rep("em", 8))
a <- rbind(a, cbind(tab1$`Other $`, tab1$`SD Opp`, tab1$N, rownames(tab1), rep("op", 8)))
a <- rbind(a, cbind(tab1$`State $`, tab1$`SD St`, tab1$N, rownames(tab1), rep("st", 8)))
tab4 <- data.frame(a)
colnames(tab4) <- c("bucks", "sd", "n", "condition", "recipient")
tab4$bucks <- as.numeric(as.character(tab4$bucks))
tab4$sd <- as.numeric(as.character(tab4$sd))
tab4$n <- as.numeric(as.character(tab4$n))
tab4$recipient <- relevel(factor(tab4$recipient), ref="op")

#create subsets for plotting
pooronly <- c("Poor Keisha", "Poor Laurie", "Poor Misty", "Poor Octavia")
exonly <- c("Excellent Keisha", "Excellent Laurie", "Excellent Misty", "Excellent Octavia")

#change labels and orders
tab4$recipient <- factor(tab4$recipient, levels = c("em", "op", "st"))
tab4$condition <- factor(tab4$condition, levels= c("Excellent Keisha", "Excellent Laurie", "Excellent Misty",
                                                   "Excellent Octavia", "Poor Laurie","Poor Octavia", "Poor Keisha", "Poor Misty"))

#create plot
pdf(here("SI Figures/excellentworkers.pdf"), width=12, height=5)
ggplot(data=tab4[tab4$condition %in% exonly,]) + geom_bar(aes(x="op", y=bucks, fill=recipient), stat="identity", position="dodge", colour="black", width=3) +
  theme_bw() + facet_wrap(~condition, nrow=1) + 
  xlab("") +
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + ylab("Average Dollars Allocated") +
  scale_fill_manual(name="Recipient", labels=c("Excellent Emily", "Treatment Name", "State"), values=c("grey38", "grey69", "gray100")) +
  geom_errorbar(aes(x=recipient, ymin=bucks-(1.96*(sd)), ymax=bucks+(1.96*(sd))), position="dodge", width=.5)
dev.off()

## Replication: Sampling Method results ####

dat <- read.csv(here("Welfare_study_rr_rep.csv"))[-c(1),]

key <- read.csv(here("..", "Replication", "qualtrics_image_IDs.csv"))
key$type <- grepl("Excellent", key$file)
key$type <- ifelse(key$type==TRUE, "e", "p")
races <- read.csv(here("..", "nameratings.csv"))
key$firstname <- str_remove_all(key$firstname, '"|,')
key <- merge(key, races %>% select(name, race), by.x="firstname", by.y="name")

key$full_url <- str_remove(key$full_url, ".*?(?=IM)")
key$full_url <- str_remove(key$full_url, '\",')
dat$image001 <- str_remove(dat$image001, ".*?(?=IM)")

dat <- merge(dat, key, by.x="image001", by.y="full_url")

dat <- dat %>%
  rename("excellent_emily"=Q19_1,
         "other"=Q19_2,
         "budget"=Q19_3)
dat <- dat %>% 
  mutate(across(excellent_emily:budget, as.numeric))

a <- dat %>%
  group_by(type, race) %>%
  dplyr::summarize(emily = mean(excellent_emily),
            opp = mean(other),
            state = mean(budget),
            em_se = sd(excellent_emily)/sqrt(508),
            opp_se = sd(other)/sqrt(508),
            st_se = sd(budget)/sqrt(508)) %>%
  pivot_longer(cols = emily:state, 
               names_to = "recip",
               values_to = "bucks") %>%
  mutate(se = case_when(recip=="emily"~em_se,
                        recip=="opp"~opp_se,
                        recip=="state"~st_se)) %>%
  select(-em_se, -opp_se, -st_se) %>%
  mutate(type = case_when(type=="e"~"Excellent (other recip.)",
                          type=="p"~"Poor (other recip.)")) %>%
  ggplot() +
  geom_bar(aes(x=recip, y=bucks, fill=race), 
           stat="identity", position="dodge") + 
  geom_errorbar(aes(x=recip, color=race, 
                    ymin = bucks - 1.96*se,
                    ymax = bucks + 1.96*se),
                position="dodge",
                show.legend = FALSE) + 
  theme_bw() + 
  xlab("Recipient") + ylab("Amount Awarded") +
  scale_fill_manual(name="Race of Other Recipient",
                       values = c("black", "gray70"),
                    labels=c("Black", "White")) + 
  scale_color_manual(values=c("gray70", "black")) + 
  scale_x_discrete(labels=c("Excellent Emily", "Other Recipient", "State Budget")) + 
  facet_wrap(~type) + 
  theme(text=element_text(size=15))

pdf(here("SI Figures/randomization_replication_results.pdf"), width=12, height=5)
a
dev.off()
